Overview

This document integrates analysis of DRIFTS DPT files from OPUSprocessing.Rmd and Leila_code_modified.Rmd.

  1. OPUSprocessing.Rmd “Reading DPT files” section - Ridge plots showing spectral data by crop at different timepoints
  2. Leila_code_modified.Rmd - Bar plots showing functional group proportions

Preparing data for analysis

Importing DPT data

This code is modified from code by Stephany Soledad Chacon, “Box> Salk Institute Project> Salk Data> FTIR_Intact_decomposition_pots> FTIR_analysis.Rmd”. Using data in the DPT format, we can extrapolate sample attributes from the file names, then normalize and visualize spectra across replicates, crops, and timepoints. The width of the line bounding the spectra represents the range of the data.

# Check if folder exists and list files
if (!dir.exists(dpt_folder)) {
  stop("Directory does not exist: ", dpt_folder)
}

# List and sort files
cat("Files found in directory:\n")
## Files found in directory:
print(list.files(dpt_folder))
##  [1] "DRIFTS_pot001_13Cwheat_wk0_root_250725.0.dpt"        
##  [2] "DRIFTS_pot012_13Csoy_wk10_root_250827.0.dpt"         
##  [3] "DRIFTS_pot029_13Cwheat_wk10_root_250827.0.dpt"       
##  [4] "DRIFTS_pot029_13Cwheat_wk40_root_250827.3.dpt"       
##  [5] "DRIFTS_pot030_13Csoy_wk10_root_250725.0.dpt"         
##  [6] "DRIFTS_pot030_13Csoy_wk40_root_250827.0.dpt"         
##  [7] "DRIFTS_pot032_13Cwheat_wk0_root_250725.0.dpt"        
##  [8] "DRIFTS_pot047_13Crice_wk0_root_250725.0.dpt"         
##  [9] "DRIFTS_pot050_13Csoy_wk0_root_250827.0.dpt"          
## [10] "DRIFTS_pot052_13CnoPlant_wk30_root_250919.0.dpt"     
## [11] "DRIFTS_pot052_13CnoPlant_wk40_root_250919.0.dpt"     
## [12] "DRIFTS_pot053_13Cwheat_wk10_root_250725.0.dpt"       
## [13] "DRIFTS_pot053_13Cwheat_wk40_root_250919.0.dpt"       
## [14] "DRIFTS_pot055_13Crice_wk10_root_250725.0.dpt"        
## [15] "DRIFTS_pot055_13Crice_wk10_root_qmark_250725.0.dpt"  
## [16] "DRIFTS_pot055_13Crice_wk40_root_250827.0.dpt"        
## [17] "DRIFTS_pot056_12Csoy_wk10_root_250919.0.dpt"         
## [18] "DRIFTS_pot062_13Csoy_wk0_root_250725.0.dpt"          
## [19] "DRIFTS_pot079_13Crice_wk0_root_250725.0.dpt"         
## [20] "DRIFTS_pot080_13Csoy_wk10_root_250725.0.dpt"         
## [21] "DRIFTS_pot080_13Csoy_wk40_root_250919.0.dpt"         
## [22] "DRIFTS_pot086_12Csoy_wk0_root_250919.0.dpt"          
## [23] "DRIFTS_pot087_13Crice_wk10_root_250827.0.dpt"        
## [24] "DRIFTS_pot087_13Crice_wk40_root_250919.0.dpt"        
## [25] "DRIFTS_pot094_13Csoy_wk10_root_250725.0.dpt"         
## [26] "DRIFTS_pot094_13Csoy_wk10_soil_250630.0.dpt"         
## [27] "DRIFTS_pot094_13Csoy_wk40_root_250919.0.dpt"         
## [28] "DRIFTS_pot103_13Crice_wk0_root_recNMR_250725.0.dpt"  
## [29] "DRIFTS_pot103_13Crice_wk0_root_vial1_250725.0.dpt"   
## [30] "DRIFTS_pot103_13Crice_wk0_root_vial2_250725.2.dpt"   
## [31] "DRIFTS_pot103_13Crice_wk0_root_vial3_250725.0.dpt"   
## [32] "DRIFTS_pot107_12Crice_wk0_root_really13_250725.0.dpt"
## [33] "DRIFTS_pot109_13Cwheat_wk40_root_250919.0.dpt"       
## [34] "DRIFTS_pot119_13Crice_wk10_root_250827.0.dpt"        
## [35] "DRIFTS_pot119_13Crice_wk40_root_250919.0.dpt"
DPTfiles <- sort(list.files(dpt_folder, pattern = ".dpt"))
cat("Number of .dpt files:", length(DPTfiles), "\n")
## Number of .dpt files: 35
# Read files individually and combine
dpt_data_list <- list()

for (i in seq_along(DPTfiles)) {
  file_path <- file.path(dpt_folder, DPTfiles[i])
  
  # Read as lines and parse manually (DPT files are space-delimited)
  lines <- readLines(file_path, warn = FALSE)
  lines <- lines[lines != ""]  # Remove empty lines
  
  # Split each line by whitespace
  split_lines <- strsplit(lines, "\\s+")
  
  # Convert to data frame
  temp_data <- tryCatch({
    data.frame(
      wavenumber = as.numeric(sapply(split_lines, `[`, 1)),
      absorbance = as.numeric(sapply(split_lines, `[`, 2)),
      stringsAsFactors = FALSE
    )
  }, error = function(e) NULL)
  
  if (!is.null(temp_data)) {
    # Add filename column
    temp_data$filename <- DPTfiles[i]
    
    # Store in list
    dpt_data_list[[i]] <- temp_data
  }
}

# Remove NULL entries (failed reads)
dpt_data_list <- dpt_data_list[!sapply(dpt_data_list, is.null)]

# Combine all data
dpt_data <- bind_rows(dpt_data_list)

# Remove any rows with NA values
dpt_data <- dpt_data[complete.cases(dpt_data), ]

# Extract metadata from filenames
dpt_data <- dpt_data %>%
  mutate(
    crop = case_when(
      str_detect(filename, "noPlant") ~ "noPlant",
      str_detect(filename, "wheat") ~ "wheat", 
      str_detect(filename, "rice") ~ "rice",
      str_detect(filename, "soy") ~ "soy",
      TRUE ~ NA_character_
    ),
    timepoint = str_extract(filename, "wk\\d+"),
    sample_type = case_when(
      str_detect(filename, "root") ~ "root",
      str_detect(filename, "soil") ~ "soil", 
      TRUE ~ NA_character_
    ),
    # Extract ID (pot number) from filename
    ID = case_when(
      str_detect(filename, "pot[0-9]+") ~ str_extract(filename, "pot[0-9]+") %>% 
                                          str_remove("pot") %>% 
                                          str_pad(width = 3, pad = "0"),
      # Alternative pattern for files without 'pot' prefix
      str_detect(filename, "DRIFTS_[0-9]+") ~ str_extract(filename, "DRIFTS_[0-9]+") %>%
                                              str_remove("DRIFTS_") %>%
                                              str_pad(width = 3, pad = "0"),
      TRUE ~ NA_character_
    )
  ) %>%
  filter(!is.na(sample_type)) %>%  # Remove files without clear sample type
  filter(!str_detect(filename, "KBr")) %>%  # Remove KBr files
  # Filter out anomalous spectra
  filter(!filename %in% c("DRIFTS_pot103_13Crice_wk0_root_recNMR_250725.0.dpt",
                          "DRIFTS_pot103_13Crice_wk0_root_vial1_250725.0.dpt", 
                          "DRIFTS_pot079_13Crice_wk0_root_250725.0.dpt",
                          "DRIFTS_pot047_13Crice_wk0_root_250725.0.dpt",
                          "DRIFTS_pot086_12Csoy_wk0_root_250919.0.dpt",
                          "DRIFTS_pot056_12Csoy_wk10_root_250919.0.dpt",
                          "DRIFTS_pot030_13Csoy_wk10_root_250725.0.dpt"))
# save as csv for record-keeping
write.csv(dpt_data, file.path(outputs_folder, "dpt_data.csv"), row.names = FALSE)

# Filter for root samples only
dpt_data_roots <- dpt_data %>% 
  filter(sample_type == "root") %>%
  filter(!is.na(crop))

cat("Root samples by crop:\n")
## Root samples by crop:
print(table(dpt_data_roots$crop))
## 
## noPlant    rice     soy   wheat 
##    5598   27990   22392   19593
cat("Root samples by timepoint:\n") 
## Root samples by timepoint:
print(table(dpt_data_roots$timepoint))
## 
##   wk0  wk10  wk30  wk40 
## 19593 25191  2799 27990

Normalizing data

This code is modified from code by Stephany Soledad Chacon, “Box> Salk Institute Project> Salk Data> FTIR_Intact_decomposition_pots> FTIR_analysis.Rmd”. It creates a new column with normalized absorbance values for each sample, using baseline correction (subtracting the minimum) followed by normalization to the maximum absorbance value.

# Normalize absorbance data by sample (same approach as OPUSprocessing.Rmd)
dpt_data_roots <- dpt_data_roots %>%
  group_by(filename) %>%
  mutate(
    # Baseline correction - subtract minimum
    baseline_corrected = absorbance - min(absorbance, na.rm = TRUE),
    # Normalize to maximum
    normalized_absorbance = baseline_corrected / max(baseline_corrected, na.rm = TRUE)
  ) %>%
  ungroup()

Plotting DRIFTS spectra

Plotting by timepoint

Ridge plots

These plots are of normalized absorbance values. The thickness of the opaque line represents the range of the data. Annotations indicate integration regions used for calculations of simple plant, complex plant, and microbial organic matter proportions.

Simple plant (aliphatic): 2976 - 2898 cm-1 + 2870 - 2839 cm-1
Microbial: 1660 - 1580 cm-1
Complex plant (aromatic): 1550 - 1500 cm-1
# Generic function to create ridge plots for any timepoint
create_timepoint_ridge_plot <- function(data, timepoint_week, title = NULL) {
  # Filter data for specific timepoint
  filtered_data <- data %>%
    filter(timepoint == timepoint_week) %>%
    mutate(crop = factor(crop, levels = c("noPlant", "wheat", "rice", "soy")))
  
  # Create title if not provided
  if (is.null(title)) {
    week_num <- str_extract(timepoint_week, "\\d+")
    title <- paste("Roots at week", week_num)
  }
  
  # Determine y-position for annotations based on data
  max_y <- max(filtered_data$normalized_absorbance)*(length(unique(filtered_data$crop))+3)
  
  # Create the plot
  ridge_plot <- filtered_data %>%
    ggplot(mapping = aes(x = wavenumber,
                         y = crop,
                         height = normalized_absorbance,
                         group = crop,
                         fill = crop,
                         color = crop)) +
    geom_density_ridges(stat = "identity",
                        scale = 3,
                        alpha = 0.25) +
    geom_vline(xintercept = 1500, linetype = "dotted", linewidth = 0.23) +
    geom_vline(xintercept = 1550, linetype = "dotted", linewidth = 0.23) +
    geom_vline(xintercept = 1580, linetype = "dashed", linewidth = 0.23) +
    geom_vline(xintercept = 1660, linetype = "dashed", linewidth = 0.23) +
    geom_vline(xintercept = 2839, linetype = "longdash", linewidth = 0.23) +
    geom_vline(xintercept = 2870, linetype = "longdash", linewidth = 0.23) +
    geom_vline(xintercept = 2898, linetype = "longdash", linewidth = 0.23) +
    geom_vline(xintercept = 2976, linetype = "longdash", linewidth = 0.23) +
    xlim(3777, 422) +
    scale_y_discrete(expand = expansion(mult = c(0.0, 0.56))) +  # Less space below, more above
    theme_classic() +
    theme(axis.text.y = element_text(size = 12),
          legend.position = "none") +
    scale_fill_manual(values = crop_colors) +
    scale_color_manual(values = crop_colors) +
    ggtitle(title) +
    labs(x = expression("wavenumber (cm"^-1*")"), y = "") +
       # spacer
    annotate("text", x = 1900, y = max_y + 0.2, label = " ", 
             size = 3, hjust = 0.5) +
    # Add simple plant region annotation
    annotate("rect", xmin = 2976, xmax = 2898, ymin = -Inf, ymax = Inf, 
             alpha = 0.2, fill = "#e3c8b1") +
    annotate("rect", xmin = 2870, xmax = 2839, ymin = -Inf, ymax = Inf, 
             alpha = 0.2, fill = "#e3c8b1") +
    annotate("text", x = 2560, y = max_y, label = "Simple Plant", 
             size = 3.3, hjust = 0.5) +
    # Add microbial region annotation
    annotate("rect", xmin = 1660, xmax = 1580, ymin = -Inf, ymax = Inf, 
             alpha = 0.2, fill = "#a8cbad") +
    annotate("text", x = 1870, y = max_y + 0.11, label = "Microbial", 
             size = 3.3, hjust = 0.5) +
    # Add complex plant region annotation
    annotate("rect", xmin = 1550, xmax = 1500, ymin = -Inf, ymax = Inf, 
             alpha = 0.2, fill = "#beb5d5") +
    annotate("text", x = 1190, y = max_y + 0.11, label = "Complex Plant", 
             size = 3.3, hjust = 0.5)
  
  return(list(plot = ridge_plot, data = filtered_data))
}

# Generic function to create sample count tables
create_sample_count_table <- function(filtered_data, timepoint_week) {
  week_num <- str_extract(timepoint_week, "\\d+")
  col_name <- paste("Week", week_num, "samples")
  
  counts <- filtered_data %>%
    select(filename, crop) %>%
    distinct() %>%
    count(crop) %>%
    arrange(match(crop, c("noPlant", "wheat", "rice", "soy"))) %>%
    mutate(!!col_name := paste(n, "&nbsp;&nbsp;", crop)) %>%
    select(all_of(col_name))
  
  return(counts)
}

# Create plots and tables for each timepoint
timepoints <- sort(unique(dpt_data_roots$timepoint))

for (tp in timepoints) {
  # Create plot and get filtered data
  result <- create_timepoint_ridge_plot(dpt_data_roots, tp)
  
  # Print the plot
  print(result$plot)
  
  # Create and print sample count table with results='asis'
  counts_table <- create_sample_count_table(result$data, tp)
  cat(knitr::kable(counts_table, format = "html", escape = FALSE))
  cat("<br><br>")  # Add HTML line breaks for spacing
}
Week 0 samples
2    wheat
3    rice
2    soy


Week 10 samples
2    wheat
4    rice
3    soy


Week 30 samples
1    noPlant


Week 40 samples
1    noPlant
3    wheat
3    rice
3    soy



Line plots

# Generic function to create line plots for any timepoint
create_timepoint_line_plot <- function(data, timepoint_week, title = NULL) {
  # Filter data for specific timepoint
  filtered_data <- data %>%
    filter(timepoint == timepoint_week) %>%
    mutate(crop = factor(crop, levels = c("noPlant", "wheat", "rice", "soy")))
  
  # Create title if not provided
  if (is.null(title)) {
    week_num <- str_extract(timepoint_week, "\\d+")
    title <- paste("Roots at week", week_num)
  }
  
  # Create the plot - offset lines vertically by crop like ridge plots
  # Only include crops that are actually present in the filtered data
  all_crop_levels <- c("noPlant", "wheat", "rice", "soy")
  present_crops <- intersect(all_crop_levels, unique(filtered_data$crop))
  len <- length(present_crops)
  # Scale the data to match ridge plot proportions
  max_abs <- max(filtered_data$normalized_absorbance, na.rm = TRUE)
  # Determine y-position for annotations based on data
  text_y <- len * 3 + 1.75
  # Create offsets only for crops that are present
  crop_offsets <- setNames(seq(0, (len-1) * 3, 3), present_crops)

  # Add offset to data
  filtered_data_offset <- filtered_data %>%
    mutate(crop_offset = crop_offsets[as.character(crop)],
           normalized_absorbance_offset = (normalized_absorbance*4.444 + crop_offset))
  line_plot <- filtered_data_offset %>%
    ggplot(mapping = aes(x = wavenumber,
                         y = normalized_absorbance_offset,
                         group = filename,
                         color = crop)) +
    xlim(3777, 422) +
    geom_line(alpha = 0.7) +
    scale_y_continuous(
      breaks = crop_offsets,
      labels = names(crop_offsets) # Reduce top space from 0.56 to 0.15
    ) +
    theme_classic() +
    theme(axis.text.y = element_text(size = 12),
          legend.position = "none") +
    scale_fill_manual(values = crop_colors) +
    scale_color_manual(values = crop_colors) +
    ggtitle(title) +
    labs(x = expression("Wavenumber (cm"^-1*")"), 
         y = "", 
         color = "Crop") +
    # Keep your existing vertical lines and annotations
    geom_vline(xintercept = 1500, linetype = "dotted", linewidth = 0.23) +
    geom_vline(xintercept = 1550, linetype = "dotted", linewidth = 0.23) +
    geom_vline(xintercept = 1580, linetype = "dashed", linewidth = 0.23) +
    geom_vline(xintercept = 1660, linetype = "dashed", linewidth = 0.23) +
    geom_vline(xintercept = 2839, linetype = "longdash", linewidth = 0.23) +
    geom_vline(xintercept = 2870, linetype = "longdash", linewidth = 0.23) +
    geom_vline(xintercept = 2898, linetype = "longdash", linewidth = 0.23) +
    geom_vline(xintercept = 2976, linetype = "longdash", linewidth = 0.23) +
    # Add region annotations - positioned to match ridge plot style
    # spacer
    annotate("text", x = 1900, y = 10.201, 
             label = " ", size = 3, hjust = 0.5) +
    # Add simple plant region annotation
    annotate("rect", xmin = 2976, xmax = 2898, ymin = -Inf, ymax = Inf, 
             alpha = 0.12, fill = "#e3c8b1") +
    annotate("rect", xmin = 2870, xmax = 2839, ymin = -Inf, ymax = Inf, 
             alpha = 0.12, fill = "#e3c8b1") +
    annotate("text", x = 2560, y = text_y, 
             label = "Simple Plant", size = 3.3, hjust = 0.5) +
    # Add microbial region annotation
    annotate("rect", xmin = 1660, xmax = 1580, ymin = -Inf, ymax = Inf, 
             alpha = 0.12, fill = "#a8cbad") +
    annotate("text", x = 1870, y = text_y, 
             label = "Microbial", size = 3.3, hjust = 0.5) +
    # Add complex plant region annotation
    annotate("rect", xmin = 1550, xmax = 1500, ymin = -Inf, ymax = Inf, 
             alpha = 0.12, fill = "#beb5d5") +
    annotate("text", x = 1200, y = text_y, 
             label = "Complex Plant", size = 3.3, hjust = 0.5)

  return(list(plot = line_plot, data = filtered_data))
}

# Generic function to create sample count tables
create_sample_count_table <- function(filtered_data, timepoint_week) {
  week_num <- str_extract(timepoint_week, "\\d+")
  col_name <- paste("Week", week_num, "samples")
  
  counts <- filtered_data %>%
    select(filename, crop) %>%
    distinct() %>%
    count(crop) %>%
    arrange(match(crop, c("noPlant", "wheat", "rice", "soy"))) %>%
    mutate(!!col_name := paste(n, "&nbsp;&nbsp;", crop)) %>%
    select(all_of(col_name))
  
  return(counts)
}

# Create plots and tables for each timepoint
timepoints <- sort(unique(dpt_data_roots$timepoint))

for (tp in timepoints) {
  # Create plot and get filtered data
  result <- create_timepoint_line_plot(dpt_data_roots, tp)
  
  # Print the plot
  print(result$plot)
  
  # Create and print sample count table with results='asis'
  counts_table <- create_sample_count_table(result$data, tp)
  cat(knitr::kable(counts_table, format = "html", escape = FALSE))
  cat("<br><br>")  # Add HTML line breaks for spacing
}
Week 0 samples
2    wheat
3    rice
2    soy


Week 10 samples
2    wheat
4    rice
3    soy


Week 30 samples
1    noPlant


Week 40 samples
1    noPlant
3    wheat
3    rice
3    soy



Plotting by crop

These plots are of normalized absorbance values. The thickness of the opaque line represents the range of the data. Annotations indicate integration regions used for calculations of simple plant, complex plant, and microbial organic matter proportions.

Simple plant (aliphatic): 2976 - 2898 cm-1 + 2870 - 2839 cm-1
Microbial: 1660 - 1580 cm-1
Complex plant (aromatic): 1550 - 1500 cm-1

Ridge plots

# Define crop-specific color palettes
crop_timepoint_colors <- list(
  wheat = c("wk0" = "#B4D586", "wk10" = "#98C559", "wk20" = "#79A63A",
            "wk30" = "#58792A", "wk40" = "#374B1B"),
  rice = c("wk0" = "#FDB35E", "wk10" = "#FC9D33", "wk29" = "#F18204",
           "wk40" = "#B56203"),
  soy = c("wk0" = "#FE71B1", "wk10" = "#FE318E", "wk20" = "#F4016E",
          "wk30" = "#B70153", "wk40" = "#7A0137")
)

# Generic function to create crop-specific DRIFTS plots
create_crop_plot <- function(data, crop_name, color_palette) {
  # Filter data for specific crop
  crop_data <- data %>%
    group_by(timepoint) %>%
    filter(crop == crop_name)
  
  # Ensure absorbance is numeric
  crop_data$normalized_absorbance <- as.numeric(crop_data$normalized_absorbance)
  
  # Create the plot
  crop_plot <- ggplot(crop_data,
                      aes(x = wavenumber,
                          y = normalized_absorbance,
                          color = timepoint)) +
    xlim(3777, 422) +
    geom_line() +
    theme_classic() +
    scale_color_manual(values = color_palette) +
    ggtitle(paste(str_to_title(crop_name), "Roots DRIFTS")) +
    labs(x = expression("wavenumber (cm"^-1*")"), 
         y = "normalized absorbance") +
    geom_vline(xintercept = 1500, linetype = "dotted", linewidth = 0.3) +
    geom_vline(xintercept = 1550, linetype = "dotted", linewidth = 0.3) +
    geom_vline(xintercept = 1580, linetype = "dashed", linewidth = 0.23) +
    geom_vline(xintercept = 1660, linetype = "dashed", linewidth = 0.23) +
    geom_vline(xintercept = 2839, linetype = "longdash", linewidth = 0.23) +
    geom_vline(xintercept = 2870, linetype = "longdash", linewidth = 0.23) +
    geom_vline(xintercept = 2898, linetype = "longdash", linewidth = 0.23) +
    geom_vline(xintercept = 2976, linetype = "longdash", linewidth = 0.23) +
    # Add simple plant region annotation
    annotate("rect", xmin = 2976, xmax = 2898, ymin = -Inf, ymax = Inf, 
             alpha = 0.2, fill = "#e3c8b1") +
    annotate("rect", xmin = 2870, xmax = 2839, ymin = -Inf, ymax = Inf, 
             alpha = 0.2, fill = "#e3c8b1") +
    annotate("text", x = 2550, y = max(crop_data$normalized_absorbance), label = "Simple Plant", 
             size = 3.3, hjust = 0.5) +
    # Add microbial region annotation
    annotate("rect", xmin = 1660, xmax = 1580, ymin = -Inf, ymax = Inf, 
             alpha = 0.2, fill = "#a8cbad") +
    annotate("text", x = 1850, y = max(crop_data$normalized_absorbance), label = "Microbial", 
             size = 3.3, hjust = 0.5) +
    # Add complex plant region annotation
    annotate("rect", xmin = 1550, xmax = 1500, ymin = -Inf, ymax = Inf, 
             alpha = 0.2, fill = "#beb5d5") +
    annotate("text", x = 1200, y = max(crop_data$normalized_absorbance)+0.02, label = "Complex Plant", 
             size = 3.3, hjust = 0.5)
  
  return(list(plot = crop_plot, data = crop_data))
}

# Generic function to create crop ID tables
create_crop_id_table <- function(crop_data, crop_name) {
  combinations <- crop_data %>%
    select(ID, timepoint) %>%
    distinct() %>%
    arrange(ID, timepoint) %>%
    group_by(timepoint) %>%
    summarise(pot_ids = {
      ids <- paste(ID, sep = "")
      # Group every 3 items and join with line breaks
      grouped <- split(ids, ceiling(seq_along(ids)/5))
      lines <- sapply(grouped, function(x) paste(x, collapse = ",&ensp;"))
      paste(lines, collapse = "<br>")
    }, .groups = 'drop') %>%
    pivot_wider(names_from = timepoint, values_from = pot_ids)
  
  return(combinations)
}

# Generate plots and tables for each crop
crops <- c("wheat", "rice", "soy")

for (crop in crops) {
  # Create plot and get data
  result <- create_crop_plot(dpt_data_roots, crop, crop_timepoint_colors[[crop]])
  
  # Print the plot
  print(result$plot)
  cat("<br>")  # Add space between plot and table
  
  # Create and print ID combinations table
  id_table <- create_crop_id_table(result$data, crop)
  cat(paste0("<h4>", str_to_title(crop), " pot root samples</h4>"))
  cat(knitr::kable(id_table, format = "html", escape = FALSE, table.attr = 'style="border-collapse: separate; border-spacing: 35px 0;"'))
  cat("<br><br>")  # Add HTML line breaks for spacing
}

Wheat pot root samples

wk0 wk10 wk40
001, 032 029, 053 029, 053, 109



Rice pot root samples

wk0 wk10 wk40
103, 107 055, 087, 119 055, 087, 119



Soy pot root samples

wk0 wk10 wk40
050, 062 012, 080, 094 030, 080, 094



Line plots

# Generic function to create line plots for any crop with suberin annotations
# Define crop-specific color palettes
crop_timepoint_colors <- list(
  noPlant = c("wk0" = "#B68ADE", "wk10" = "#9C6BC7", "wk20" = "#7B41A9",
              "wk30" = "#6f3e94", "wk40" = "#3F1C59"),
  wheat = c("wk0" = "#B4D586", "wk10" = "#98C559", "wk20" = "#79A63A",
            "wk30" = "#58792A", "wk40" = "#374B1B"),
  rice = c("wk0" = "#FDB35E", "wk10" = "#FC9D33", "wk29" = "#F18204",
           "wk40" = "#B56203"),
  soy = c("wk0" = "#FE71B1", "wk10" = "#FE318E", "wk20" = "#F4016E",
          "wk30" = "#B70153", "wk40" = "#7A0137")
)

create_crop_line_plot <- function(data, crop_name, color_palette) {
 # Filter data for specific crop
  crop_data <- data %>%
    group_by(timepoint) %>%
    filter(crop == crop_name)
  
  # Ensure absorbance is numeric
  crop_data$normalized_absorbance <- as.numeric(crop_data$normalized_absorbance)
  
  # Create the plot
  # Scale the data to match ridge plot proportions
  max_abs <- max(crop_data$normalized_absorbance, na.rm = TRUE)
  line_plot <- crop_data %>%
    ggplot(mapping = aes(x = wavenumber,
                         y = normalized_absorbance,
                         group = filename,
                         color = timepoint)) +
    xlim(3777, 422) +
    geom_line(alpha = 0.8) +
    theme_classic() +
    scale_color_manual(values = color_palette) +
    ggtitle(paste(str_to_title(crop_name), "roots DRIFTS")) +
    labs(x = expression("wavenumber (cm"^-1*")"), 
         y = "normalized absorbance") +
# annotations
    geom_vline(xintercept = 1500, linetype = "dotted", linewidth = 0.3) +
    geom_vline(xintercept = 1550, linetype = "dotted", linewidth = 0.3) +
    geom_vline(xintercept = 1580, linetype = "dashed", linewidth = 0.23) +
    geom_vline(xintercept = 1660, linetype = "dashed", linewidth = 0.23) +
    geom_vline(xintercept = 2839, linetype = "longdash", linewidth = 0.23) +
    geom_vline(xintercept = 2870, linetype = "longdash", linewidth = 0.23) +
    geom_vline(xintercept = 2898, linetype = "longdash", linewidth = 0.23) +
    geom_vline(xintercept = 2976, linetype = "longdash", linewidth = 0.23) +
    # Add simple plant region annotation
    annotate("rect", xmin = 2976, xmax = 2898, ymin = -Inf, ymax = Inf, 
             alpha = 0.12, fill = "#e3c8b1") +
    annotate("rect", xmin = 2870, xmax = 2839, ymin = -Inf, ymax = Inf, 
             alpha = 0.12, fill = "#e3c8b1") +
    annotate("text", x = 2550, y = max(crop_data$normalized_absorbance), label = "Simple Plant", 
             size = 3.3, hjust = 0.5) +
    # Add microbial region annotation
    annotate("rect", xmin = 1660, xmax = 1580, ymin = -Inf, ymax = Inf, 
             alpha = 0.12, fill = "#a8cbad") +
    annotate("text", x = 1850, y = max(crop_data$normalized_absorbance), label = "Microbial", 
             size = 3.3, hjust = 0.5) +
    # Add complex plant region annotation
    annotate("rect", xmin = 1550, xmax = 1500, ymin = -Inf, ymax = Inf, 
             alpha = 0.12, fill = "#beb5d5") +
    annotate("text", x = 1200, y = max(crop_data$normalized_absorbance)+0.02, label = "Complex Plant", 
             size = 3.3, hjust = 0.5)

  return(list(plot = line_plot, data = crop_data))
}

# Generate plots and tables for each crop
crops <- c("noPlant", "wheat", "rice", "soy")

for (crop in crops) {
  # Create plot and get data
  result <- create_crop_line_plot(dpt_data_roots, crop, crop_timepoint_colors[[crop]])
  
  # Print the plot
  print(result$plot)
  cat("<br>")  # Add space between plot and table
  
  # Create and print ID combinations table
  id_table <- create_crop_id_table(result$data, crop)
  cat(paste0("<h4>", str_to_title(crop), " pot root samples</h4>"))
  cat(knitr::kable(id_table, format = "html", escape = FALSE, table.attr = 'style="border-collapse: separate; border-spacing: 35px 0;"'))
  cat("<br><br>")  # Add HTML line breaks for spacing
}

Noplant pot root samples

wk30 wk40
052 052



Wheat pot root samples

wk0 wk10 wk40
001, 032 029, 053 029, 053, 109



Rice pot root samples

wk0 wk10 wk40
103, 107 055, 087, 119 055, 087, 119



Soy pot root samples

wk0 wk10 wk40
050, 062 012, 080, 094 030, 080, 094



Combined stacked overlay plot

These chunks create a single visualization showing all crops and timepoints by stacking the overlay plots vertically with a shared y-axis. Each crop has its own color palette and legend.

Combined stacked overlay plot (without noPlant)

This version excludes noPlant and uses the original grouping method where line thickness represents the range of data for each timepoint.

# Define crop-specific color palettes (exclude noPlant)
crop_timepoint_colors_three <- list(
  wheat = c("wk0" = "#B4D586", "wk10" = "#98C559", "wk40" = "#374B1B"),
  rice = c("wk0" = "#FDB35E", "wk10" = "#FC9D33", "wk40" = "#B56203"),
  soy = c("wk0" = "#FE71B1", "wk10" = "#FE318E", "wk40" = "#7A0137")
)

# Prepare data with three crops only (ordered from bottom to top: wheat, rice, soy)
crops_ordered_three <- c("soy", "rice", "wheat")  # Reversed for plotting so soy appears at top
three_crops_data <- dpt_data_roots %>%
  filter(crop %in% crops_ordered_three) %>%
  mutate(crop = factor(crop, levels = crops_ordered_three)) %>%
  mutate(normalized_absorbance = as.numeric(normalized_absorbance))

# Calculate vertical offset for each crop to create stacking effect
offset_amount_three <- 0.7  # Space between each crop's spectra
crop_offsets_three <- setNames(seq(0, length(crops_ordered_three) - 1) * offset_amount_three, crops_ordered_three)

# Add offset to absorbance values based on crop
three_crops_data <- three_crops_data %>%
  mutate(offset_absorbance = normalized_absorbance + crop_offsets_three[as.character(crop)])

# Determine max y position for annotations
max_y_three <- max(three_crops_data$offset_absorbance, na.rm = TRUE)

# Create the base plot with functional region annotations
base_plot_three <- ggplot() +
  xlim(3777, 422) +
  theme_classic() +
  labs(x = expression("wavenumber (cm"^-1*")"), 
       y = "normalized absorbance") +
  theme(
    axis.text.y = element_blank(),
    # axis.ticks.y = element_blank(),
    axis.title.y = element_text(size = 14.5),
    axis.text.x = element_text(size = 14.5, face = "bold"),
    axis.title.x = element_text(size = 15.5),
    legend.position = "right",
    legend.justification = c(0, 0.07),  # (x, y) position; 0,0 is bottom-right
    legend.box = "vertical",
    legend.spacing.y = unit(2.8, "cm"),
    legend.title = element_text(size = 15, face = "bold"),
    legend.text = element_text(size = 14.5),
    plot.margin = margin(7, 14, 7, 7, "pt")  # (top, right, bottom, left)
  ) +
  scale_y_continuous(
    breaks = crop_offsets_three + 0.003, # y-tick positions
    labels = NULL,
    expand = expansion(mult = c(0.003, 0.015)) # (bottom padding, top padding)
  ) +
  scale_x_reverse(
    limits = c(3770, 415),
    breaks = seq(3500, 500, -500),  # Ticks every 500 from 3500 down to 500
    expand = c(0.002, 0.003)  # Remove padding on x-axis
  ) +
  # Add vertical lines for functional regions
  geom_vline(xintercept = 1500, linetype = "dotted", linewidth = 0.46) +
  geom_vline(xintercept = 1550, linetype = "dotted", linewidth = 0.46) +
  geom_vline(xintercept = 1580, linetype = "dashed", linewidth = 0.23) +
  geom_vline(xintercept = 1660, linetype = "dashed", linewidth = 0.23) +
  geom_vline(xintercept = 2839, linetype = "longdash", linewidth = 0.23) +
  geom_vline(xintercept = 2870, linetype = "longdash", linewidth = 0.23) +
  geom_vline(xintercept = 2898, linetype = "longdash", linewidth = 0.23) +
  geom_vline(xintercept = 2976, linetype = "longdash", linewidth = 0.23) +
  # Add shaded regions with annotations
  annotate("rect", xmin = 2976, xmax = 2898, ymin = -Inf, ymax = Inf, 
           alpha = 0.12, fill = "#e3c8b1") +
  annotate("rect", xmin = 2870, xmax = 2839, ymin = -Inf, ymax = Inf, 
           alpha = 0.12, fill = "#e3c8b1") +
  annotate("text", x = 2650, y = max_y_three - 0.05, label = "Simple Plant", 
           size = 4.8, hjust = 0.5) +
  annotate("rect", xmin = 1660, xmax = 1580, ymin = -Inf, ymax = Inf, 
           alpha = 0.12, fill = "#a8cbad") +
  annotate("text", x = 1800, y = max_y_three, label = "Microbial", 
           size = 4.8, hjust = 0.5) +
  annotate("rect", xmin = 1550, xmax = 1500, ymin = -Inf, ymax = Inf, 
           alpha = 0.12, fill = "#beb5d5") +
  annotate("text", x = 1300, y = max_y_three, label = "Complex Plant", 
           size = 4.8, hjust = 0.5)

# Add all crop layers without automatic legends, explicitly coloring each line
combined_overlay_plot_no_noplant <- base_plot_three

for (crop in crops_ordered_three) {
  crop_data <- three_crops_data %>% filter(crop == !!crop)
  
  for (tp in names(crop_timepoint_colors_three[[crop]])) {
    tp_data <- crop_data %>% filter(timepoint == tp)
    
    combined_overlay_plot_no_noplant <- combined_overlay_plot_no_noplant +
      geom_line(data = tp_data,
                aes(x = wavenumber, y = offset_absorbance),
                color = crop_timepoint_colors_three[[crop]][tp],
                linewidth = 0.5,
                show.legend = FALSE)
  }
}

# Create dummy data for manual legends - one for each crop
dummy_wheat <- data.frame(
  x = rep(3777, 3), y = rep(0, 3),
  wheat_tp = names(crop_timepoint_colors_three$wheat)
)

dummy_rice <- data.frame(
  x = rep(3777, 3), y = rep(0, 3),
  rice_tp = names(crop_timepoint_colors_three$rice)
)

dummy_soy <- data.frame(
  x = rep(3777, 3), y = rep(0, 3),
  soy_tp = names(crop_timepoint_colors_three$soy)
)

# Add invisible layers with separate aesthetics for each crop
combined_overlay_plot_no_noplant <- combined_overlay_plot_no_noplant +
  # Wheat legend
  geom_line(data = dummy_wheat,
            aes(x = x, y = y, color = wheat_tp),
            linewidth = 0, alpha = 0) +
  scale_color_manual(
    name = "Wheat",
    values = crop_timepoint_colors_three$wheat,
    breaks = names(crop_timepoint_colors_three$wheat),
    labels = names(crop_timepoint_colors_three$wheat),
    guide = guide_legend(
      override.aes = list(linewidth = 2, alpha = 1),
      order = 1
    )
  ) +
  ggnewscale::new_scale_color() +
  # Rice legend
  geom_line(data = dummy_rice,
            aes(x = x, y = y, color = rice_tp),
            linewidth = 0, alpha = 0) +
  scale_color_manual(
    name = "Rice",
    values = crop_timepoint_colors_three$rice,
    breaks = names(crop_timepoint_colors_three$rice),
    labels = names(crop_timepoint_colors_three$rice),
    guide = guide_legend(
      override.aes = list(linewidth = 2, alpha = 1),
      order = 2
    )
  ) +
  ggnewscale::new_scale_color() +
  # Soy legend
  geom_line(data = dummy_soy,
            aes(x = x, y = y, color = soy_tp),
            linewidth = 0, alpha = 0) +
  scale_color_manual(
    name = "Soy",
    values = crop_timepoint_colors_three$soy,
    breaks = names(crop_timepoint_colors_three$soy),
    labels = names(crop_timepoint_colors_three$soy),
    guide = guide_legend(
      override.aes = list(linewidth = 2, alpha = 1),
      order = 3
    )
  )

print(combined_overlay_plot_no_noplant)

Comparing regions of interest

Modified from “Berhe_Ghezzehei_Lab/DRIFTS Data Analysis/Visualizing_FTIR_w_Elemental_Data.R” which starts by calculating aliphatic and aromatic indices, then deriving simple and complex plant and microbial proportions before visualizing everything. This procedure sums non-normalized absorbance values over a specified range, and divides by the total peak area in the ranges of interest to get the proportions.

Integrating spectral regions

# Function to perform spectral integration on DPT data (based on Leila_code_modified.Rmd)
perform_spectral_integration <- function(dpt_data) {
  cat("Starting spectral integration processing...\n")
  
  # Load required library for integration
  if (!require(pracma, quietly = TRUE)) {
    install.packages("pracma")
    library(pracma)
  }
  
  # Prepare the data in the format needed for integration
  # Rename columns to match expected format
  comb <- dpt_data %>%
    select(filename, wavenumber, absorbance) %>%
    rename(sample = filename, wavelength = wavenumber, abs = absorbance)
  
  # Define spectral windows of interest (same as Leila's code)
  red1 <- comb[comb$wavelength > 2839 & comb$wavelength < 2870, ]  # Aliphatic 1
  red2 <- comb[comb$wavelength > 2898 & comb$wavelength < 2976, ]  # Aliphatic 2  
  red3 <- comb[comb$wavelength > 1580 & comb$wavelength < 1660, ]  # Microbial
  red4 <- comb[comb$wavelength > 1500 & comb$wavelength < 1550, ]  # Complex plant
  
  # Get unique samples
  sample <- unique(comb$sample)
  
  # Create integration results data frame
  ints <- data.frame(
    sample = sample,
    int_2839_2870 = numeric(length(sample)),
    int_2898_2976 = numeric(length(sample)),
    int_1580_1660 = numeric(length(sample)),
    int_1500_1550 = numeric(length(sample)),
    stringsAsFactors = FALSE
  )
  
  # Calculate integrated areas for each spectral window and sample
  cat("Processing", length(sample), "samples for integration...\n")
  for (i in seq_len(nrow(ints))) {
    current_sample <- sample[i]
    
    # Extract data for current sample from each spectral window
    sample_red1 <- red1[red1$sample == current_sample, ]
    sample_red2 <- red2[red2$sample == current_sample, ]
    sample_red3 <- red3[red3$sample == current_sample, ]
    sample_red4 <- red4[red4$sample == current_sample, ]
    
    # Calculate area under curve (integration) using trapezoidal rule
    if (nrow(sample_red1) > 1) {
      ints$int_2839_2870[i] <- pracma::trapz(sample_red1$wavelength, sample_red1$abs)
    }
    
    if (nrow(sample_red2) > 1) {
      ints$int_2898_2976[i] <- pracma::trapz(sample_red2$wavelength, sample_red2$abs)
    }
    
    if (nrow(sample_red3) > 1) {
      ints$int_1580_1660[i] <- pracma::trapz(sample_red3$wavelength, sample_red3$abs)
    }
    
    if (nrow(sample_red4) > 1) {
      ints$int_1500_1550[i] <- pracma::trapz(sample_red4$wavelength, sample_red4$abs)
    }
  }
  
  # Use existing metadata from the input data (no need to re-extract)
  cat("Using existing metadata from DPT data...\n")
  basic_meta <- dpt_data %>%
    select(filename, crop, timepoint, sample_type, ID) %>%
    distinct() %>%
    rename(sample = filename, type = sample_type)
  
  # Combine metadata with integration results
  ftir_data <- merge(basic_meta, ints, by = "sample", all = TRUE)
  
  # Rename integration columns to match expected names
  names(ftir_data)[names(ftir_data) == "int_1580_1660"] <- "int_microbial"
  names(ftir_data)[names(ftir_data) == "int_1500_1550"] <- "int_complex_plant"
  
  # Calculate derived metrics (same as original)
  ftir_data$aliphatic <- ftir_data$int_2839_2870 + ftir_data$int_2898_2976
  ftir_data$total_peak_area <- ftir_data$aliphatic + ftir_data$int_microbial + ftir_data$int_complex_plant
  ftir_data$simple_plant_prop <- ftir_data$aliphatic / ftir_data$total_peak_area
  ftir_data$complex_plant_prop <- ftir_data$int_complex_plant / ftir_data$total_peak_area
  ftir_data$microbial_prop <- ftir_data$int_microbial / ftir_data$total_peak_area
  ftir_data$simple_microbial_prop <- ftir_data$simple_plant_prop / ftir_data$microbial_prop
  ftir_data$complex_microbial_prop <- ftir_data$complex_plant_prop / ftir_data$microbial_prop
  
  # Remove individual aliphatic integration columns (keep only derived metrics)
  ftir_data <- ftir_data %>%
    select(-int_2839_2870, -int_2898_2976)
  
  # Save the processed data to the output directory
  today <- format(Sys.Date(), "%Y-%m-%d")
  output_file <- file.path(outputs_folder, paste0("processed_int_data_", today, ".csv"))
  write.csv(ftir_data, file = output_file, row.names = FALSE)
  cat("Saved processed integration data to:", output_file, "\n")
  
  cat("Integration processing completed successfully.\n")
  return(ftir_data)
}

Generating statistics

# Try to find the most recent processed integration data file
ftir_file <- find_most_recent_processed_file(integrated_ftir_data)
## Found 10 processed integration files
## Using most recent file: processed_int_data_2025-10-23.csv
if (!is.null(ftir_file) && file.exists(ftir_file)) {
  ftir_data <- read_csv(ftir_file, show_col_types = FALSE)
  cat("Successfully loaded FTIR integration data from:", basename(ftir_file), "\n")
  cat("Dimensions:", dim(ftir_data), "\n")
  cat("Columns:", names(ftir_data), "\n")
} else {
  cat("No processed integration data found. Processing raw DPT data to generate integration results...\n")
  
  # Check if we have the required dpt_data_roots from previous chunk
  if (!exists("dpt_data_roots") || nrow(dpt_data_roots) == 0) {
    stop("No DPT data available for integration. Please ensure the DPT data import was successful.")
  }
  
  # Process raw DPT data using the integrated function
  ftir_data <- perform_spectral_integration(dpt_data_roots)
  
  # Verify the processing was successful
  if (is.null(ftir_data) || nrow(ftir_data) == 0) {
    stop("Failed to process DPT data for integration.")
  }
  
  cat("Successfully processed", nrow(ftir_data), "samples from DPT data\n")
  cat("Generated columns:", names(ftir_data), "\n")
}
## Successfully loaded FTIR integration data from: processed_int_data_2025-10-23.csv 
## Dimensions: 32 17 
## Columns: source ID isotope crop timepoint type notes run_date int_microbial int_complex_plant aliphatic total_peak_area simple_plant_prop complex_plant_prop microbial_prop simple_microbial_prop complex_microbial_prop
# Calculate summary statistics by group
crop_stats <- ftir_data %>%
  mutate(crop = factor(crop, levels = c("noPlant", "wheat", "rice", "soy"))) %>%
  group_by(crop) %>%
  summarise(
    Mean_simple_plant_prop = mean(simple_plant_prop, na.rm = TRUE),
    SE_simple_plant_prop = sd(simple_plant_prop, na.rm = TRUE) / sqrt(n()),
    Mean_complex_plant_prop = mean(complex_plant_prop, na.rm = TRUE),
    SE_complex_plant_prop = sd(complex_plant_prop, na.rm = TRUE) / sqrt(n()),
    Mean_microbial_prop = mean(microbial_prop, na.rm = TRUE),
    SE_microbial_prop = sd(microbial_prop, na.rm = TRUE) / sqrt(n()),
    .groups = 'drop'
  )

timepoint_stats <- ftir_data %>%
  group_by(timepoint) %>%
  summarise(
    Mean_simple_plant_prop = mean(simple_plant_prop, na.rm = TRUE),
    SE_simple_plant_prop = sd(simple_plant_prop, na.rm = TRUE) / sqrt(n()),
    Mean_complex_plant_prop = mean(complex_plant_prop, na.rm = TRUE),
    SE_complex_plant_prop = sd(complex_plant_prop, na.rm = TRUE) / sqrt(n()),
    Mean_microbial_prop = mean(microbial_prop, na.rm = TRUE),
    SE_microbial_prop = sd(microbial_prop, na.rm = TRUE) / sqrt(n()),
    .groups = 'drop'
  )

crop_timepoint_stats <- ftir_data %>%
  mutate(crop = factor(crop, levels = c("noPlant", "wheat", "rice", "soy"))) %>%
  group_by(crop, timepoint) %>%
  summarise(
    Mean_simple_plant_prop = mean(simple_plant_prop, na.rm = TRUE),
    SE_simple_plant_prop = sd(simple_plant_prop, na.rm = TRUE) / sqrt(n()),
    Mean_complex_plant_prop = mean(complex_plant_prop, na.rm = TRUE),
    SE_complex_plant_prop = sd(complex_plant_prop, na.rm = TRUE) / sqrt(n()),
    Mean_microbial_prop = mean(microbial_prop, na.rm = TRUE),
    SE_microbial_prop = sd(microbial_prop, na.rm = TRUE) / sqrt(n()),
    .groups = 'drop'
  )

Visualizing integrated ranges

This code is modified from “Visualizing_FTIR_w_Elemental_Data.R”

# Recreate the EXACT plotting function from Leila_code_modified.Rmd
create_proportion_plots <- function(stats_data, group_col, title_prefix) {
  # Choose colors based on grouping variable
  colors_to_use <- if(group_col == "crop") crop_colors else timepoint_colors
  
  p1 <- ggplot(data = stats_data, aes_string(x = group_col, y = "Mean_simple_plant_prop", fill = group_col)) +
    geom_col(alpha = 0.8) +
    geom_errorbar(aes_string(ymin = "Mean_simple_plant_prop - SE_simple_plant_prop", 
                            ymax = "Mean_simple_plant_prop + SE_simple_plant_prop"),
                  width = 0.2) +
    theme_classic() +
    scale_fill_manual(values = colors_to_use) +
    labs(title = paste0(title_prefix, "Simple Plant OM"),
         x = str_to_title(group_col),
         y = "Simple Plant OM Proportion") +
    theme(legend.position = "none",
          axis.text.x = element_text(angle = 45, hjust = 1),
          plot.margin = margin(20, 20, 20, 20),
          axis.title = element_text(size = 12),
          plot.title = element_text(size = 10))

  p2 <- ggplot(data = stats_data, aes_string(x = group_col, y = "Mean_complex_plant_prop", fill = group_col)) +
    geom_col(alpha = 0.8) +
    geom_errorbar(aes_string(ymin = "Mean_complex_plant_prop - SE_complex_plant_prop", 
                            ymax = "Mean_complex_plant_prop + SE_complex_plant_prop"),
                  width = 0.2) +
    theme_classic() +
    scale_fill_manual(values = colors_to_use) +
    labs(title = paste0(title_prefix, "Complex Plant OM"),
         x = str_to_title(group_col),
         y = "Complex Plant OM Proportion") +
    theme(legend.position = "none",
          axis.text.x = element_text(angle = 45, hjust = 1),
          plot.margin = margin(20, 20, 20, 20),
          axis.title = element_text(size = 12),
          plot.title = element_text(size = 10))

  p3 <- ggplot(data = stats_data, aes_string(x = group_col, y = "Mean_microbial_prop", fill = group_col)) +
    geom_col(alpha = 0.8) +
    geom_errorbar(aes_string(ymin = "Mean_microbial_prop - SE_microbial_prop", 
                            ymax = "Mean_microbial_prop + SE_microbial_prop"),
                  width = 0.2) +
    theme_classic() +
    scale_fill_manual(values = colors_to_use) +
    labs(title = paste0(title_prefix, "Microbial OM"),
         x = str_to_title(group_col),
         y = "Microbial OM Proportion") +
    theme(legend.position = "none",
          axis.text.x = element_text(angle = 45, hjust = 1),
          plot.margin = margin(20, 20, 20, 20),
          axis.title = element_text(size = 12),
          plot.title = element_text(size = 10))

  list(p1 = p1, p2 = p2, p3 = p3)
}

# Create EXACT plots by crop (from Leila_code_modified.Rmd)
crop_plots <- create_proportion_plots(crop_stats, "crop", "")
crop_combined <- plot_grid(crop_plots$p1, crop_plots$p2, crop_plots$p3, nrow = 1, labels = "auto")
print(crop_combined)

# Create EXACT plots by timepoint (from Leila_code_modified.Rmd)
timepoint_plots <- create_proportion_plots(timepoint_stats, "timepoint", "")
timepoint_combined <- plot_grid(timepoint_plots$p1, timepoint_plots$p2, timepoint_plots$p3, nrow = 1, labels = "auto")
print(timepoint_combined)

# SIMPLE PLANT PROPORTION INTERACTION PLOT
# Recreate simple plant proportion interaction plot (crop x timepoint) from Leila_code_modified.Rmd
interaction_plot <- ggplot(crop_timepoint_stats) +
  geom_col(aes(x = crop, y = Mean_simple_plant_prop, fill = timepoint),
           position = "dodge", alpha = 0.8) +
  geom_errorbar(aes(x = crop,
                    ymin = Mean_simple_plant_prop - SE_simple_plant_prop,
                    ymax = Mean_simple_plant_prop + SE_simple_plant_prop,
                    group = timepoint),
                position = position_dodge(0.9), width = 0.2) +
  theme_classic() +
  scale_fill_manual("Timepoint", values = timepoint_colors) +
  labs(title = "Simple Plant OM by Crop and Timepoint",
       x = "Crop Type",
       y = "Simple Plant OM Proportion") +
  theme(legend.position = "top",
        plot.margin = margin(20, 20, 20, 20),
        axis.text.x = element_text(angle = 45, hjust = 1))

print(interaction_plot)

# COMPLEX PLANT PROPORTION INTERACTION PLOT
CP_interaction_plot <- ggplot(crop_timepoint_stats) +
  geom_col(aes(x = crop, y = Mean_complex_plant_prop, fill = timepoint),
           position = "dodge", alpha = 0.8) +
  geom_errorbar(aes(x = crop,
                    ymin = Mean_complex_plant_prop - SE_complex_plant_prop,
                    ymax = Mean_complex_plant_prop + SE_complex_plant_prop,
                    group = timepoint),
                position = position_dodge(0.9), width = 0.2) +
  theme_classic() +
  scale_fill_manual("Timepoint", values = timepoint_colors) +
  labs(title = "Complex Plant OM by Crop and Timepoint",
       x = "Crop Type",
       y = "Complex Plant OM Proportion") +
  theme(legend.position = "top",
        plot.margin = margin(20, 20, 20, 20),
        axis.text.x = element_text(angle = 45, hjust = 1))

print(CP_interaction_plot)

# MICROBIAL PROPORTION INTERACTION PLOT
micr_interaction_plot <- ggplot(crop_timepoint_stats) +
  geom_col(aes(x = crop, y = Mean_microbial_prop, fill = timepoint),
           position = "dodge", alpha = 0.8) +
  geom_errorbar(aes(x = crop,
                    ymin = Mean_microbial_prop - SE_microbial_prop,
                    ymax = Mean_microbial_prop + SE_microbial_prop,
                    group = timepoint),
                position = position_dodge(0.9), width = 0.2) +
  theme_classic() +
  scale_fill_manual("Timepoint", values = timepoint_colors) +
  labs(title = "Microbial OM by Crop and Timepoint",
       x = "Crop Type",
       y = "Microbial OM Proportion") +
  theme(legend.position = "top",
        plot.margin = margin(20, 20, 20, 20),
        axis.text.x = element_text(angle = 45, hjust = 1))

print(micr_interaction_plot)

# STACKED BAR PLOTS
# Recreate stacked bar plots from Leila_code_modified.Rmd
create_stacked_data <- function(stats_data, group_col) {
  # Reshape data for stacking (exact reproduction)
  long_data <- stats_data %>%
    select(all_of(group_col), Mean_simple_plant_prop, Mean_complex_plant_prop, Mean_microbial_prop) %>%
    pivot_longer(cols = starts_with("Mean_"),
                 names_to = "func_type",
                 values_to = "proportion") %>%
    mutate(func_type = case_when(
      func_type == "Mean_simple_plant_prop" ~ "Simple Plant",
      func_type == "Mean_complex_plant_prop" ~ "Complex Plant",
      func_type == "Mean_microbial_prop" ~ "Microbial"
    )) %>%
    mutate(func_type = factor(func_type, levels = c("Complex Plant", "Simple Plant", "Microbial")))

  long_data
}

# Create stacked plots
crop_stacked_data <- create_stacked_data(crop_stats, "crop")
crop_stacked_data$crop <- factor(crop_stacked_data$crop, levels = c("noPlant", "wheat", "rice", "soy"))
timepoint_stacked_data <- create_stacked_data(timepoint_stats, "timepoint")

p_crop_stacked <- ggplot(crop_stacked_data, aes(x = crop, y = proportion * 100, fill = func_type)) +
  geom_col(position = "stack") +
  scale_fill_manual(values = c("#93deed", "#d2f0f4", "#1ca5cf"),
                    breaks = c("Microbial", "Simple Plant", "Complex Plant")) +
  theme_classic() +
  labs(x = "Crop Type",
       y = "Functional Group Amount (%)",
       fill = "") +
  theme(legend.position = "top",
        plot.margin = margin(20, 27, 20, 13),
        axis.text.x = element_text(angle = 45, hjust = 1),
        legend.margin = margin(10, 16, 10, 8))

p_timepoint_stacked <- ggplot(timepoint_stacked_data, aes(x = timepoint, y = proportion * 100, fill = func_type)) +
  geom_col(position = "stack") +
  scale_fill_manual(values = c("#e8aa9c", "#f5d8d1", "#c54e30"),
                    breaks = c("Microbial", "Simple Plant", "Complex Plant")) +
  theme_classic() +
  labs(x = "Timepoint",
       y = "Functional Group Amount (%)",
       fill = "") +
  theme(legend.position = "top",
        plot.margin = margin(20, 27, 20, 13),
        axis.text.x = element_text(angle = 45, hjust = 1),
        legend.margin = margin(10, 16, 10, 8))

combined_stacked <- plot_grid(
  p_crop_stacked,
  p_timepoint_stacked,
  nrow = 1,
  labels = "auto"
)
print(combined_stacked)

Stacked interaction plots (crop x timepoint)

timepoint_prop_colors <- list(
  "wk0" = c("Simple Plant" = "#8a82c0", "Microbial" = "#bdb8e0", "Complex Plant" = "#50478A"),
  "wk10" = c("Simple Plant" = "#7ba7c6", "Microbial" = "#b8d0e0", "Complex Plant" = "#3D6988"),
  "wk20" = c("Simple Plant" = "#c5aa7c", "Microbial" = "#e0d1b8", "Complex Plant" = "#876B3D"),
  "wk30" = c("Simple Plant" = "#7cc5af", "Microbial" = "#b8e0d4", "Complex Plant" = "#3D8770"),
  "wk40" = c("Simple Plant" = "#b4c57c", "Microbial" = "#d6e0b8", "Complex Plant" = "#75873D")
)

# Stacked interaction plot - crop x timepoint with all three proportions
# Create stacked data for interaction plot
interaction_stacked_data <- crop_timepoint_stats %>%
  select(crop, timepoint, Mean_simple_plant_prop, Mean_complex_plant_prop, Mean_microbial_prop) %>%
  pivot_longer(cols = starts_with("Mean_"),
               names_to = "func_type",
               values_to = "proportion") %>%
  mutate(func_type = case_when(
    func_type == "Mean_simple_plant_prop" ~ "Simple Plant",
    func_type == "Mean_microbial_prop" ~ "Microbial",
    func_type == "Mean_complex_plant_prop" ~ "Complex Plant"
  )) %>%
  mutate(func_type = factor(func_type, levels = c("Simple Plant", "Microbial", "Complex Plant")))

# Create a combined group for positioning (crop:timepoint)
interaction_stacked_data <- interaction_stacked_data %>%
  mutate(crop_time = interaction(crop, timepoint, sep = "_"))

# Apply colors - using average of timepoint colors for each functional group
# Or better - use timepoint-specific colors from timepoint_prop_colors
# Create fill based on timepoint:func_type combination
interaction_stacked_data <- interaction_stacked_data %>%
  mutate(fill_group = paste(timepoint, func_type, sep = "_"))

interaction_plot_stacked <- ggplot(interaction_stacked_data, 
                                   aes(x = interaction(crop, timepoint), 
                                       y = proportion * 100, 
                                       fill = fill_group)) +
  geom_col(position = "stack") +
  facet_grid(. ~ crop, scales = "free_x", space = "free_x") +
  scale_x_discrete(labels = function(x) {
    # Extract just the timepoint part (after the dot)
    sapply(strsplit(as.character(x), "\\."), function(parts) parts[2])
  }) +
  scale_y_continuous(expand = expansion(mult = c(0.01, 0.02))) +
  theme_classic() +
  labs(title = "",
       x = "",
       y = "Functional Group Proportion (%)",
       fill = "") +
  theme(legend.position = "right",
        plot.margin = margin(-2, 76, 0, 12), # (top, right, bottom, left)
        axis.text.x = element_text(angle = 45, hjust = .9, size = 12.5, face = "bold"),
        axis.title.y = element_text(size = 12),
        axis.text.y = element_text(size = 11),
        strip.background = element_blank(),
        strip.text = element_text(size = 14.5, face = "bold")) +
  coord_cartesian(clip = "off")

# Create manual color scale using timepoint_prop_colors
color_values <- c()
color_names <- c()
for (tp in c("wk0", "wk10", "wk20", "wk30", "wk40")) {
  for (ft in c("Simple Plant", "Microbial", "Complex Plant")) {
    color_names <- c(color_names, paste(tp, ft, sep = "_"))
    color_values <- c(color_values, timepoint_prop_colors[[tp]][ft])
  }
}
names(color_values) <- color_names

interaction_plot_stacked <- interaction_plot_stacked +
  scale_fill_manual(values = color_values,
                    labels = function(x) {
                      parts <- strsplit(x, "_")
                      sapply(parts, function(p) paste(p[1], p[2], sep = " - "))
                    }) +
  theme(legend.position = "none")

# Calculate label positions (midpoint of each segment in the rightmost bar)
# Get the rightmost crop-timepoint combination for labeling
label_data <- interaction_stacked_data %>%
  filter(crop == "soy" & timepoint == "wk40") %>%
  arrange(func_type) %>%
  mutate(
    # Calculate cumulative sum for positioning
    cum_prop = cumsum(proportion * 100),
    # Calculate midpoint of each segment
    label_y = cum_prop - (proportion * 100) / 2
  )

# Add labels on the right side
interaction_plot_stacked <- interaction_plot_stacked +
  geom_text(data = label_data,
            aes(x = interaction(crop, timepoint), 
                y = label_y, 
                label = func_type),
            hjust = 0, 
            nudge_x = 0.5,
            size = 3.8,
            angle = 10,
            fontface = "bold",
            inherit.aes = FALSE)

print(interaction_plot_stacked)

# Stacked interaction plot - crop x timepoint (without noPlant)
# Create stacked data for interaction plot
interaction_stacked_data_no_noPlant <- crop_timepoint_stats %>%
  filter(crop != "noPlant") %>%
  select(crop, timepoint, Mean_simple_plant_prop, Mean_complex_plant_prop, Mean_microbial_prop) %>%
  pivot_longer(cols = starts_with("Mean_"),
               names_to = "func_type",
               values_to = "proportion") %>%
  mutate(func_type = case_when(
    func_type == "Mean_simple_plant_prop" ~ "Simple Plant",
    func_type == "Mean_microbial_prop" ~ "Microbial",
    func_type == "Mean_complex_plant_prop" ~ "Complex Plant"
  )) %>%
  mutate(func_type = factor(func_type, levels = c("Simple Plant", "Microbial", "Complex Plant")))

# Create a combined group for positioning (crop:timepoint)
interaction_stacked_data_no_noPlant <- interaction_stacked_data_no_noPlant %>%
  mutate(crop_time = interaction(crop, timepoint, sep = "_"))

# Create fill based on timepoint:func_type combination
interaction_stacked_data_no_noPlant <- interaction_stacked_data_no_noPlant %>%
  mutate(fill_group = paste(timepoint, func_type, sep = "_"))

interaction_plot_stacked_no_noPlant <- ggplot(interaction_stacked_data_no_noPlant, 
                                   aes(x = interaction(crop, timepoint), 
                                       y = proportion * 100, 
                                       fill = fill_group)) +
  geom_col(position = "stack") +
  facet_grid(. ~ crop, scales = "free_x", space = "free_x") +
  scale_x_discrete(labels = function(x) {
    # Extract just the timepoint part (after the dot)
    sapply(strsplit(as.character(x), "\\."), function(parts) parts[2])
  }) +
  scale_y_continuous(expand = expansion(mult = c(0.01, 0.02))) +
  theme_classic() +
  labs(title = "",
       x = "",
       y = "Functional Group Proportion (%)",
       fill = "") +
  theme(legend.position = "right",
        plot.margin = margin(-2, 76, 0, 12), # (top, right, bottom, left)
        axis.text.x = element_text(angle = 45, hjust = .9, size = 12.5, face = "bold"),
        axis.title.y = element_text(size = 12),
        axis.text.y = element_text(size = 11),
        strip.background = element_blank(),
        strip.text = element_text(size = 14.5, face = "bold")) +
  coord_cartesian(clip = "off")

# Create manual color scale using timepoint_prop_colors
interaction_plot_stacked_no_noPlant <- interaction_plot_stacked_no_noPlant +
  scale_fill_manual(values = color_values,
                    labels = function(x) {
                      parts <- strsplit(x, "_")
                      sapply(parts, function(p) paste(p[1], p[2], sep = " - "))
                    }) +
  theme(legend.position = "none")

# Calculate label positions (midpoint of each segment for ALL bars)
# Calculate cumulative proportions for positioning percentage labels
label_data_no_noPlant <- interaction_stacked_data_no_noPlant %>%
  group_by(crop, timepoint) %>%
  arrange(func_type) %>%
  mutate(
    # Calculate cumulative sum for positioning
    cum_prop = cumsum(proportion * 100),
    # Calculate midpoint of each segment
    label_y = cum_prop - (proportion * 100) / 2,
    # Create percentage label
    pct_label = paste0(round(proportion * 100, 1), "%"),
    # Assign label color: black for Microbial (light), white for others (darker)
    # label_color = ifelse(func_type == "Microbial", "black", "white")
    label_color = ifelse(func_type == "Complex Plant", "white", "black")
  ) %>%
  ungroup()

# Create label data for func_type labels (rightmost bar only)
functype_label_data <- label_data_no_noPlant %>%
  filter(crop == "soy" & timepoint == "wk40")

# Add percentage labels to all bars
interaction_plot_stacked_no_noPlant <- interaction_plot_stacked_no_noPlant +
  geom_text(data = label_data_no_noPlant,
            aes(x = interaction(crop, timepoint), 
                y = label_y, 
                label = pct_label,
                color = label_color),
            size = 3.2,
            fontface = "plain",
            inherit.aes = FALSE) +
  scale_color_identity() +  # Use the actual color values from label_color
  # Add func_type labels on the right side
  geom_text(data = functype_label_data,
            aes(x = interaction(crop, timepoint), 
                y = label_y, 
                label = func_type),
            hjust = 0, 
            nudge_x = 0.5,
            size = 3.8,
            angle = 10,
            fontface = "bold",
            inherit.aes = FALSE)

print(interaction_plot_stacked_no_noPlant)

Plotting with suberin annotations

These ridge plots show the same spectral data as the original Ridge Plots section, but with annotations highlighting suberin-related peaks identified in the literature. The annotations indicate peaks associated with suberin content in plant roots, based on White et al. (2011, 2016).

Aliphatic suberin moiety: 2923 - 2930, 2854 - 2856 cm-1 (within 3000-2800 cm-1 range)
Suberin esters: 1737-1742 cm-1
Aromatic suberin moiety: 1506 cm-1 (also lignin)

White, K. E., Reeves, J. B., & Coale, F. J. (2011). Mid-infrared diffuse reflectance spectroscopy for the rapid analysis of plant root composition. Geoderma, 167–168, 197–203. https://doi.org/10.1016/j.geoderma.2011.08.009
or
White, K. E., Reeves, J. B., & Coale, F. J. (2016). Cell wall compositional changes during incubation of plant roots measured by mid-infrared diffuse reflectance spectroscopy and fiber analysis. Geoderma, 264, 205–213. https://doi.org/10.1016/j.geoderma.2015.10.018

Plotting by timepoint (with suberin annotations)

Ridge plots

# Generic function to create ridge plots with suberin annotations
create_suberin_ridge_plot <- function(data, timepoint_week, title = NULL) {
  # Filter data for specific timepoint
  filtered_data <- data %>%
    filter(timepoint == timepoint_week) %>%
    mutate(crop = factor(crop, levels = c("noPlant", "wheat", "rice", "soy")))
  
  # Create title if not provided
  if (is.null(title)) {
    week_num <- str_extract(timepoint_week, "\\d+")
    title <- paste("Root suberin at week", week_num)
  }
  
  # Determine y-position for annotations based on data
  max_y <- max(filtered_data$normalized_absorbance)*(length(unique(filtered_data$crop))+3)
  
  # Create the plot
  ridge_plot <- filtered_data %>%
    ggplot(mapping = aes(x = wavenumber,
                         y = crop,
                         height = normalized_absorbance,
                         group = crop,
                         fill = crop,
                         color = crop)) +
    geom_density_ridges(stat = "identity",
                        scale = 3,
                        alpha = 0.25) +
    
    # Suberin peak annotations
    # Aliphatic suberin region (3000-2850 cm-1)
    annotate("rect", xmin = 2825, xmax = 3000, ymin = -Inf, ymax = Inf, 
             alpha = 0.06, fill = "#5076ab") +
    # Specific aliphatic peaks
    # geom_vline(xintercept = 2923, linetype = "longdash", linewidth = 0.23, color = "#3D6AA9") +
    # geom_vline(xintercept = 2930, linetype = "longdash", linewidth = 0.23, color = "#3D6AA9") +
    # asymmetric CH2 stretch region (2930, 2925, 2919 cm-1)
    annotate("rect", xmin = 2922, xmax = 2932, ymin = -Inf, ymax = Inf, 
             alpha = 0.27, fill = "#5076ab") +
    # geom_vline(xintercept = 2854, linetype = "longdash", linewidth = 0.23, color = "#3D6AA9") +
    # geom_vline(xintercept = 2856, linetype = "longdash", linewidth = 0.23, color = "#3D6AA9") +
    annotate("rect", xmin = 2853, xmax = 2858, ymin = -Inf, ymax = Inf, 
             alpha = 0.27, fill = "#5076ab") +

    # Suberin esters (1737-1740 cm-1)
    annotate("rect", xmin = 1730, xmax = 1748, ymin = -Inf, ymax = Inf, 
             alpha = 0.2, fill = "#4d653a") +
    # geom_vline(xintercept = 1739, linetype = "dashed", linewidth = 0.23, color = "#4d653a") +

    # Suberin fingerprint (1464, 1240, 1174 cm-1)
    geom_vline(xintercept = 1464, linetype = "dotted", linewidth = 0.3, color = "#5f3e38") +
    geom_vline(xintercept = 1240, linetype = "dotted", linewidth = 0.3, color = "#5f3e38") +
    geom_vline(xintercept = 1174, linetype = "dotted", linewidth = 0.3, color = "#5f3e38") +

    xlim(3200, 1100) +
     scale_y_discrete(expand = expansion(mult = c(0.01, 0.56))) +  # Less space below, more above
    theme_classic() +
    theme(axis.text.y = element_text(size = 12),
          legend.position = "none") +
    scale_fill_manual(values = crop_colors) +
    scale_color_manual(values = crop_colors) +
    ggtitle(title) +
    labs(x = expression("wavenumber (cm"^-1*")"), y = "") +
   # spacer
    annotate("text", x = 1900, y = max_y + 0.2, label = " ", 
             size = 3, hjust = 0.5) +
    # Add aliphatic annotation
    annotate("text", x = 2626, y = max_y-0.2, label = "aliphatic suberin", 
             size = 3.3, hjust = 0.5) +
    # Add esters annotation
    annotate("text", x = 1900, y = max_y, label = "suberin esters", 
             size = 3.3, hjust = 0.5) +
    # Add fingerprint annotation
    annotate("label", x = 1310, y = max_y + 0.1, label = "suberin fingerprint", 
             size = 3.3, hjust = 0.5, fill = "#ffffff", label.size = NA)

  return(list(plot = ridge_plot, data = filtered_data))
}

# Get unique timepoints and sort them (use dpt_data_roots, not ftir_data)
timepoints <- sort(unique(dpt_data_roots$timepoint))

# Create plots and tables for each timepoint
timepoints <- sort(unique(dpt_data_roots$timepoint))

for (tp in timepoints) {
  # Create plot and get filtered data
  result <- create_suberin_ridge_plot(dpt_data_roots, tp)
  
  # Print the plot
  print(result$plot)
  
  # Create and print sample count table with results='asis'
  counts_table <- create_sample_count_table(result$data, tp)
  cat(knitr::kable(counts_table, format = "html", escape = FALSE))
  cat("<br><br>")  # Add HTML line breaks for spacing
}
Week 0 samples
2    wheat
3    rice
2    soy


Week 10 samples
2    wheat
4    rice
3    soy


Week 30 samples
1    noPlant


Week 40 samples
1    noPlant
3    wheat
3    rice
3    soy



Line plots

# Generic function to create line plots for any timepoint
create_timepoint_line_plot <- function(data, timepoint_week, title = NULL) {
  # Filter data for specific timepoint
  filtered_data <- data %>%
    filter(timepoint == timepoint_week) %>%
    mutate(crop = factor(crop, levels = c("noPlant", "wheat", "rice", "soy")))
  
  # Create title if not provided
  if (is.null(title)) {
    week_num <- str_extract(timepoint_week, "\\d+")
    title <- paste("Roots at week", week_num)
  }
  
  # Create the plot - offset lines vertically by crop like ridge plots
  # Only include crops that are actually present in the filtered data
  all_crop_levels <- c("noPlant", "wheat", "rice", "soy")
  present_crops <- intersect(all_crop_levels, unique(filtered_data$crop))
  len <- length(present_crops)
  # Scale the data to match ridge plot proportions
  max_abs <- max(filtered_data$normalized_absorbance, na.rm = TRUE)
  # Determine y-position for annotations based on data
  text_y <- len * 2 + 1.75
  # Create offsets only for crops that are present
  crop_offsets <- setNames(seq(0, (len-1) * 3, 3), present_crops)

  # Add offset to data
  filtered_data_offset <- filtered_data %>%
    mutate(crop_offset = crop_offsets[as.character(crop)],
           normalized_absorbance_offset = (normalized_absorbance*4.444 + crop_offset))
  line_plot <- filtered_data_offset %>%
    ggplot(mapping = aes(x = wavenumber,
                         y = normalized_absorbance_offset,
                         group = filename,
                         color = crop)) +
    xlim(3200, 1100) +
    geom_line(alpha = 0.7) +
    scale_y_continuous(
      breaks = crop_offsets + 1.111,  # move labels up
      labels = names(crop_offsets)
    ) +
    theme_classic() +
    theme(axis.text.y = element_text(size = 12),
          legend.position = "none") +
    scale_fill_manual(values = crop_colors) +
    scale_color_manual(values = crop_colors) +
    ggtitle(title) +
    labs(x = expression("wavenumber (cm"^-1*")"), 
         y = "", 
         color = "Crop") +
      # Suberin peak annotations
    # Aliphatic suberin region (3000-2850 cm-1)
    annotate("rect", xmin = 2825, xmax = 3000, ymin = -Inf, ymax = Inf, 
             alpha = 0.06, fill = "#5076ab") +
    # Specific aliphatic peaks
    # geom_vline(xintercept = 2923, linetype = "longdash", linewidth = 0.23, color = "#3D6AA9") +
    # geom_vline(xintercept = 2930, linetype = "longdash", linewidth = 0.23, color = "#3D6AA9") +
    # asymmetric CH2 stretch region (2930, 2925, 2919 cm-1)
    annotate("rect", xmin = 2922, xmax = 2932, ymin = -Inf, ymax = Inf, 
             alpha = 0.27, fill = "#5076ab") +
    # geom_vline(xintercept = 2854, linetype = "longdash", linewidth = 0.23, color = "#3D6AA9") +
    # geom_vline(xintercept = 2856, linetype = "longdash", linewidth = 0.23, color = "#3D6AA9") +
    annotate("rect", xmin = 2853, xmax = 2858, ymin = -Inf, ymax = Inf, 
             alpha = 0.27, fill = "#5076ab") +

    # Suberin esters (1737-1740 cm-1)
    annotate("rect", xmin = 1730, xmax = 1748, ymin = -Inf, ymax = Inf, 
             alpha = 0.2, fill = "#4d653a") +
    # geom_vline(xintercept = 1739, linetype = "dashed", linewidth = 0.23, color = "#4d653a") +
    # suberin fingerprint (1464, 1240, 1174 cm-1)
    geom_vline(xintercept = 1464, linetype = "dotted", linewidth = 0.33, color = "#5f3e38") +
    geom_vline(xintercept = 1240, linetype = "dotted", linewidth = 0.33, color = "#5f3e38") +
    geom_vline(xintercept = 1174, linetype = "dotted", linewidth = 0.33, color = "#5f3e38") +

   # spacer
    annotate("text", x = 1900, y = 11, label = " ", 
             size = 3.3, hjust = 0.5) +
    # Add aliphatic annotation
    annotate("text", x = 2626, y = text_y*1.19, label = "aliphatic suberin", 
             size = 3.3, hjust = 0.5) +
    # Add esters annotation
    annotate("text", x = 1920, y = text_y*1.32, label = "suberin esters", 
             size = 3.3, hjust = 0.5) +
    # Add fingerprint annotation
    annotate("label", x = 1310, y = text_y*1.4, label = "suberin fingerprint", 
             size = 3.3, hjust = 0.5, fill = "white", label.size = NA)

  return(list(plot = line_plot, data = filtered_data))
}

# Generic function to create sample count tables
create_sample_count_table <- function(filtered_data, timepoint_week) {
  week_num <- str_extract(timepoint_week, "\\d+")
  col_name <- paste("Week", week_num, "samples")
  
  counts <- filtered_data %>%
    select(filename, crop) %>%
    distinct() %>%
    count(crop) %>%
    arrange(match(crop, c("noPlant", "wheat", "rice", "soy"))) %>%
    mutate(!!col_name := paste(n, "&nbsp;&nbsp;", crop)) %>%
    select(all_of(col_name))
  
  return(counts)
}

# Create plots and tables for each timepoint
timepoints <- sort(unique(dpt_data_roots$timepoint))

for (tp in timepoints) {
  # Create plot and get filtered data
  result <- create_timepoint_line_plot(dpt_data_roots, tp)
  
  # Print the plot
  print(result$plot)
  
  # Create and print sample count table with results='asis'
  counts_table <- create_sample_count_table(result$data, tp)
  cat(knitr::kable(counts_table, format = "html", escape = FALSE))
  cat("<br><br>")  # Add HTML line breaks for spacing
}
Week 0 samples
2    wheat
3    rice
2    soy


Week 10 samples
2    wheat
4    rice
3    soy


Week 30 samples
1    noPlant


Week 40 samples
1    noPlant
3    wheat
3    rice
3    soy



Plotting by crop (with suberin annotations)

Ridge Plots

# Define crop-specific color palettes
crop_timepoint_colors <- list(
  noPlant = c("wk0" = "#B68ADE", "wk10" = "#9C6BC7", "wk20" = "#7B41A9",
              "wk30" = "#6f3e94", "wk40" = "#3F1C59"),
  wheat = c("wk0" = "#B4D586", "wk10" = "#98C559", "wk20" = "#79A63A",
            "wk30" = "#58792A", "wk40" = "#374B1B"),
  rice = c("wk0" = "#FDB35E", "wk10" = "#FC9D33", "wk29" = "#F18204",
           "wk40" = "#B56203"),
  soy = c("wk0" = "#FE71B1", "wk10" = "#FE318E", "wk20" = "#F4016E",
          "wk30" = "#B70153", "wk40" = "#7A0137")
)
# Generic function to create crop-specific DRIFTS plots with suberin annotations 
create_crop_plot <- function(data, crop_name, color_palette) {
  # Filter data for specific crop
  crop_data <- data %>%
    group_by(timepoint) %>%
    filter(crop == crop_name)
  
  # Ensure absorbance is numeric
  crop_data$normalized_absorbance <- as.numeric(crop_data$normalized_absorbance)
  
  # Create the plot
  crop_plot <- ggplot(crop_data,
                      aes(x = wavenumber,
                          y = normalized_absorbance,
                          color = timepoint)) +
    xlim(3200, 1100) +
    geom_line() +
    theme_classic() +
    scale_color_manual(values = color_palette) +
    ggtitle(paste(str_to_title(crop_name), "root suberin")) +
    labs(x = expression("wavenumber (cm"^-1*")"), 
         y = "normalized absorbance") +
   # Suberin peak annotations
    # Aliphatic suberin region (3000-2850 cm-1)
    annotate("rect", xmin = 2825, xmax = 3000, ymin = -Inf, ymax = Inf, 
             alpha = 0.06, fill = "#5076ab") +
    # Specific aliphatic peaks
    geom_vline(xintercept = 2923, linetype = "longdash", linewidth = 0.23, color = "#3D6AA9") +
    geom_vline(xintercept = 2930, linetype = "longdash", linewidth = 0.23, color = "#3D6AA9") +
    # asymmetric CH2 stretch region (2930, 2925, 2919 cm-1)
    annotate("rect", xmin = 2922, xmax = 2932, ymin = -Inf, ymax = Inf, 
             alpha = 0.27, fill = "#5076ab") +
    # geom_vline(xintercept = 2854, linetype = "longdash", linewidth = 0.23, color = "#3D6AA9") +
    # geom_vline(xintercept = 2856, linetype = "longdash", linewidth = 0.23, color = "#3D6AA9") +
    annotate("rect", xmin = 2853, xmax = 2858, ymin = -Inf, ymax = Inf, 
             alpha = 0.27, fill = "#5076ab") +

    # Suberin esters (1737-1740 cm-1)
    annotate("rect", xmin = 1730, xmax = 1748, ymin = -Inf, ymax = Inf, 
             alpha = 0.2, fill = "#4d653a") +
    # geom_vline(xintercept = 1739, linetype = "dashed", linewidth = 0.23, color = "#4d653a") +

    # Suberin fingerprint (1464, 1240, 1174 cm-1)
    geom_vline(xintercept = 1464, linetype = "dotted", linewidth = 0.33, color = "#5f3e38") +
    geom_vline(xintercept = 1240, linetype = "dotted", linewidth = 0.33, color = "#5f3e38") +
    geom_vline(xintercept = 1174, linetype = "dotted", linewidth = 0.33, color = "#5f3e38") +

    # Add aliphatic annotation
    annotate("text", x = 2626, y = max(crop_data$normalized_absorbance)-0.05, label = "aliphatic suberin", 
             size = 3.3, hjust = 0.5) +
    # Add esters annotation
    annotate("text", x = 1919, y = max(crop_data$normalized_absorbance), label = "suberin esters", 
             size = 3.3, hjust = 0.5) +
    # Add fingerprint annotation
    annotate("label", x = 1300, y = max(crop_data$normalized_absorbance)+0.04, label = "suberin fingerprint", size = 3.3, hjust = 0.5, fill = "white", label.size = NA)

  
  return(list(plot = crop_plot, data = crop_data))
}

# Generate plots and tables for each crop
crops <- c("noPlant", "wheat", "rice", "soy")

for (crop in crops) {
  # Create plot and get data
  result <- create_crop_plot(dpt_data_roots, crop, crop_timepoint_colors[[crop]])
  
  # Print the plot
  print(result$plot)
  cat("<br>")  # Add space between plot and table
  
  # Create and print ID combinations table
  id_table <- create_crop_id_table(result$data, crop)
  cat(paste0("<h4>", str_to_title(crop), " pot root samples</h4>"))
  cat(knitr::kable(id_table, format = "html", escape = FALSE, table.attr = 'style="border-collapse: separate; border-spacing: 35px 0;"'))
  cat("<br><br>")  # Add HTML line breaks for spacing
}

Noplant pot root samples

wk30 wk40
052 052



Wheat pot root samples

wk0 wk10 wk40
001, 032 029, 053 029, 053, 109



Rice pot root samples

wk0 wk10 wk40
103, 107 055, 087, 119 055, 087, 119



Soy pot root samples

wk0 wk10 wk40
050, 062 012, 080, 094 030, 080, 094



Combined stacked overlay plot (with suberin annotations)

This chunk creates a single visualization showing all crops and timepoints by stacking the overlay plots vertically with a shared y-axis. Each crop has its own color palette and legend.

# Define crop-specific color palettes (exclude noPlant)
crop_timepoint_colors_three <- list(
  wheat = c("wk0" = "#B4D586", "wk10" = "#98C559", "wk40" = "#374B1B"),
  rice = c("wk0" = "#FDB35E", "wk10" = "#FC9D33", "wk40" = "#B56203"),
  soy = c("wk0" = "#FE71B1", "wk10" = "#FE318E", "wk40" = "#7A0137")
)

# Prepare data with three crops only (ordered from bottom to top: wheat, rice, soy)
crops_ordered_three <- c("soy", "rice", "wheat")  # Reversed for plotting so soy appears at top
three_crops_data <- dpt_data_roots %>%
  filter(crop %in% crops_ordered_three) %>%
  mutate(crop = factor(crop, levels = crops_ordered_three)) %>%
  mutate(normalized_absorbance = as.numeric(normalized_absorbance))

# Calculate vertical offset for each crop to create stacking effect
offset_amount_three <- 0.7  # Space between each crop's spectra
crop_offsets_three <- setNames(seq(0, length(crops_ordered_three) - 1) * offset_amount_three, crops_ordered_three)

# Add offset to absorbance values based on crop
three_crops_data <- three_crops_data %>%
  mutate(offset_absorbance = normalized_absorbance + crop_offsets_three[as.character(crop)])

# Determine max y position for annotations
max_y_three <- max(three_crops_data$offset_absorbance, na.rm = TRUE)

# Create the base plot with functional region annotations
base_plot_three <- ggplot() +
  xlim(3777, 422) +
  theme_classic() +
  labs(x = expression("wavenumber (cm"^-1*")"), 
       y = "normalized absorbance") +
  theme(
    axis.text.y = element_blank(),
    # axis.ticks.y = element_blank(),
    axis.title.y = element_text(size = 14.5),
    axis.text.x = element_text(size = 14.5, face = "bold"),
    axis.title.x = element_text(size = 15.5),
    legend.position = "right",
    legend.justification = c(0, 0.07),  # (x, y) position; 0,0 is bottom-right
    legend.box = "vertical",
    legend.spacing.y = unit(2.8, "cm"),
    legend.title = element_text(size = 15, face = "bold"),
    legend.text = element_text(size = 14.5),
    plot.margin = margin(7, 14, 7, 7, "pt")  # (top, right, bottom, left)
  ) +
  scale_y_continuous(
    breaks = crop_offsets_three + 0.003, # y-tick positions
    labels = NULL,
    expand = expansion(mult = c(0.003, 0.015)) # (bottom padding, top padding)
  ) +
  scale_x_reverse(
    limits = c(3770, 415),
    breaks = seq(3500, 500, -500),  # Ticks every 500 from 3500 down to 500
    expand = c(0.002, 0.003)  # Remove padding on x-axis
  ) +
  
    # Suberin peak annotations
    # Aliphatic suberin region (3000-2850 cm-1)
    annotate("rect", xmin = 2825, xmax = 2999, ymin = -Inf, ymax = Inf, 
             alpha = 0.06, fill = "#5076ab") +
    # Specific aliphatic peaks
    # geom_vline(xintercept = 2923, linetype = "longdash", linewidth = 0.23, color = "#3D6AA9") +
    # geom_vline(xintercept = 2930, linetype = "longdash", linewidth = 0.23, color = "#3D6AA9") +
    # asymmetric CH2 stretch region (2930, 2925, 2919 cm-1)
    annotate("rect", xmin = 2922, xmax = 2932, ymin = -Inf, ymax = Inf, 
             alpha = 0.27, fill = "#5076ab") +
    # geom_vline(xintercept = 2854, linetype = "longdash", linewidth = 0.23, color = "#3D6AA9") +
    # geom_vline(xintercept = 2856, linetype = "longdash", linewidth = 0.23, color = "#3D6AA9") +
    annotate("rect", xmin = 2853, xmax = 2858, ymin = -Inf, ymax = Inf, 
             alpha = 0.27, fill = "#5076ab") +

    # Suberin esters (1737-1740 cm-1)
    annotate("rect", xmin = 1730, xmax = 1748, ymin = -Inf, ymax = Inf, 
             alpha = 0.2, fill = "#4d653a") +
    # geom_vline(xintercept = 1739, linetype = "dashed", linewidth = 0.23, color = "#4d653a") +

    # Suberin fingerprint (1464, 1240, 1174 cm-1)
    geom_vline(xintercept = 1464, linetype = "dotted", linewidth = 0.5, color = "#5f3e38") +
    geom_vline(xintercept = 1240, linetype = "dotted", linewidth = 0.5, color = "#5f3e38") +
    geom_vline(xintercept = 1174, linetype = "dotted", linewidth = 0.5, color = "#5f3e38") +
  # Add aliphatic annotation
  annotate("text", x = 2620, y = max_y_three - 0.03, label = "aliphatic suberin", 
            size = 4.8, hjust = 0.5) +
  # Add esters annotation
  annotate("text", x = 1930, y = max_y_three - 0.01, label = "suberin esters", 
            size = 4.8, hjust = 0.5) +
  # Add fingerprint annotation
  annotate("label", x = 1290, y = max_y_three, label = "suberin fingerprint", 
            size = 4.8, hjust = 0.5, fill = "white", label.size = NA)
 
# Add all crop layers without automatic legends, explicitly coloring each line
combined_overlay_plot_suberin <- base_plot_three

for (crop in crops_ordered_three) {
  crop_data <- three_crops_data %>% filter(crop == !!crop)
  
  for (tp in names(crop_timepoint_colors_three[[crop]])) {
    tp_data <- crop_data %>% filter(timepoint == tp)
    
    combined_overlay_plot_suberin <- combined_overlay_plot_suberin +
      geom_line(data = tp_data,
                aes(x = wavenumber, y = offset_absorbance),
                color = crop_timepoint_colors_three[[crop]][tp],
                linewidth = 0.5,
                show.legend = FALSE)
  }
}

# Create dummy data for manual legends - one for each crop
dummy_wheat <- data.frame(
  x = rep(3777, 3), y = rep(0, 3),
  wheat_tp = names(crop_timepoint_colors_three$wheat)
)

dummy_rice <- data.frame(
  x = rep(3777, 3), y = rep(0, 3),
  rice_tp = names(crop_timepoint_colors_three$rice)
)

dummy_soy <- data.frame(
  x = rep(3777, 3), y = rep(0, 3),
  soy_tp = names(crop_timepoint_colors_three$soy)
)

# Add invisible layers with separate aesthetics for each crop
combined_overlay_plot_suberin <- combined_overlay_plot_suberin +
  # Wheat legend
  geom_line(data = dummy_wheat,
            aes(x = x, y = y, color = wheat_tp),
            linewidth = 0, alpha = 0) +
  scale_color_manual(
    name = "Wheat",
    values = crop_timepoint_colors_three$wheat,
    breaks = names(crop_timepoint_colors_three$wheat),
    labels = names(crop_timepoint_colors_three$wheat),
    guide = guide_legend(
      override.aes = list(linewidth = 2, alpha = 1),
      order = 1
    )
  ) +
  ggnewscale::new_scale_color() +
  # Rice legend
  geom_line(data = dummy_rice,
            aes(x = x, y = y, color = rice_tp),
            linewidth = 0, alpha = 0) +
  scale_color_manual(
    name = "Rice",
    values = crop_timepoint_colors_three$rice,
    breaks = names(crop_timepoint_colors_three$rice),
    labels = names(crop_timepoint_colors_three$rice),
    guide = guide_legend(
      override.aes = list(linewidth = 2, alpha = 1),
      order = 2
    )
  ) +
  ggnewscale::new_scale_color() +
  # Soy legend
  geom_line(data = dummy_soy,
            aes(x = x, y = y, color = soy_tp),
            linewidth = 0, alpha = 0) +
  scale_color_manual(
    name = "Soy",
    values = crop_timepoint_colors_three$soy,
    breaks = names(crop_timepoint_colors_three$soy),
    labels = names(crop_timepoint_colors_three$soy),
    guide = guide_legend(
      override.aes = list(linewidth = 2, alpha = 1),
      order = 3
    )
  )

print(combined_overlay_plot_suberin)

Line Plots

# Generic function to create line plots for any crop with suberin annotations
# Define crop-specific color palettes
crop_timepoint_colors <- list(
  noPlant = c("wk0" = "#B68ADE", "wk10" = "#9C6BC7", "wk20" = "#7B41A9",
              "wk30" = "#6f3e94", "wk40" = "#3F1C59"),
  wheat = c("wk0" = "#B4D586", "wk10" = "#98C559", "wk20" = "#79A63A",
            "wk30" = "#58792A", "wk40" = "#374B1B"),
  rice = c("wk0" = "#FDB35E", "wk10" = "#FC9D33", "wk29" = "#F18204",
           "wk40" = "#B56203"),
  soy = c("wk0" = "#FE71B1", "wk10" = "#FE318E", "wk20" = "#F4016E",
          "wk30" = "#B70153", "wk40" = "#7A0137")
)

create_crop_line_plot <- function(data, crop_name, color_palette) {
 # Filter data for specific crop
  crop_data <- data %>%
    group_by(timepoint) %>%
    filter(crop == crop_name)
  
  # Ensure absorbance is numeric
  crop_data$normalized_absorbance <- as.numeric(crop_data$normalized_absorbance)
  
  # Create the plot
  # Scale the data to match ridge plot proportions
  max_abs <- max(crop_data$normalized_absorbance, na.rm = TRUE)
  line_plot <- crop_data %>%
    ggplot(mapping = aes(x = wavenumber,
                         y = normalized_absorbance,
                         group = filename,
                         color = timepoint)) +
    xlim(3200, 1100) +
    geom_line(alpha = 0.8) +
    theme_classic() +
    scale_color_manual(values = color_palette) +
    ggtitle(paste(str_to_title(crop_name), "root suberin")) +
    labs(x = expression("wavenumber (cm"^-1*")"), 
         y = "normalized absorbance") +
    # Suberin peak annotations
    # Aliphatic suberin region (3000-2850 cm-1)
    annotate("rect", xmin = 2825, xmax = 3010, ymin = -Inf, ymax = Inf, 
             alpha = 0.07, fill = "#5076ab") +
    # Specific aliphatic peaks
    geom_vline(xintercept = 2923, linetype = "longdash", linewidth = 0.23, color = "#3D6AA9") +
    geom_vline(xintercept = 2930, linetype = "longdash", linewidth = 0.23, color = "#3D6AA9") +
    # asymmetric CH2 stretch region (2930, 2925, 2919 cm-1)
    annotate("rect", xmin = 2923, xmax = 2930, ymin = -Inf, ymax = Inf, 
             alpha = 0.22, fill = "#5076ab") +
    geom_vline(xintercept = 2854, linetype = "longdash", linewidth = 0.23, color = "#3D6AA9") +
    geom_vline(xintercept = 2856, linetype = "longdash", linewidth = 0.23, color = "#3D6AA9") +
    annotate("rect", xmin = 2854, xmax = 2856, ymin = -Inf, ymax = Inf, 
             alpha = 0.22, fill = "#5076ab") +

    # Suberin esters (1737-1740 cm-1)
    annotate("rect", xmin = 1730, xmax = 1742, ymin = -Inf, ymax = Inf, 
             alpha = 0.15, fill = "#4d653a") +
    geom_vline(xintercept = 1737, linetype = "dashed", linewidth = 0.23, color = "#4d653a") +

    # Aromatic suberin (1464, 1240, 1174 cm-1)
    geom_vline(xintercept = 1464, linetype = "dotted", linewidth = 0.33, color = "#5f3e38") +
    geom_vline(xintercept = 1240, linetype = "dotted", linewidth = 0.33, color = "#5f3e38") +
    geom_vline(xintercept = 1174, linetype = "dotted", linewidth = 0.33, color = "#5f3e38") +

    # Add aliphatic annotation
    annotate("text", x = 2626, y = max(crop_data$normalized_absorbance)-0.05, label = "aliphatic suberin", 
             size = 3.3, hjust = 0.5, color = "#163a6c") +
    # Add esters annotation
    annotate("text", x = 1919, y = max(crop_data$normalized_absorbance), label = "suberin esters", 
             size = 3.3, hjust = 0.5, color = "#2b3c1d") +
    # Add fingerprint annotation
    annotate("text", x = 1296, y = max(crop_data$normalized_absorbance)+0.02, label = "suberin fingerprint", 
             size = 3.3, hjust = 0.5, color = "#3f2622")
  return(list(plot = line_plot, data = crop_data))
}

# Generate plots and tables for each crop
crops <- c("noPlant", "wheat", "rice", "soy")

for (crop in crops) {
  # Create plot and get data
  result <- create_crop_line_plot(dpt_data_roots, crop, crop_timepoint_colors[[crop]])
  
  # Print the plot
  print(result$plot)
  cat("<br>")  # Add space between plot and table
  
  # Create and print ID combinations table
  id_table <- create_crop_id_table(result$data, crop)
  cat(paste0("<h4>", str_to_title(crop), " pot root samples</h4>"))
  cat(knitr::kable(id_table, format = "html", escape = FALSE, table.attr = 'style="border-collapse: separate; border-spacing: 35px 0;"'))
  cat("<br><br>")  # Add HTML line breaks for spacing
}

Noplant pot root samples

wk30 wk40
052 052



Wheat pot root samples

wk0 wk10 wk40
001, 032 029, 053 029, 053, 109



Rice pot root samples

wk0 wk10 wk40
103, 107 055, 087, 119 055, 087, 119



Soy pot root samples

wk0 wk10 wk40
050, 062 012, 080, 094 030, 080, 094



Summary

# Print summary information
cat("Summary of FTIR Analysis:\n")
## Summary of FTIR Analysis:
cat("Number of samples:", nrow(ftir_data), "\n")
## Number of samples: 32
cat("Crops:", paste(unique(ftir_data$crop), collapse = ", "), "\n")
## Crops: wheat, soy, rice, noPlant
cat("Timepoints:", paste(unique(ftir_data$timepoint), collapse = ", "), "\n")
## Timepoints: wk0, wk10, wk40, wk30
# Display summary statistics
print("Crop Statistics:")
## [1] "Crop Statistics:"
print(crop_stats)
## # A tibble: 4 × 7
##   crop    Mean_simple_plant_prop SE_simple_plant_prop Mean_complex_plant_prop
##   <fct>                    <dbl>                <dbl>                   <dbl>
## 1 noPlant                  0.397              0.0354                    0.205
## 2 wheat                    0.430              0.0103                    0.192
## 3 rice                     0.451              0.00973                   0.182
## 4 soy                      0.368              0.0292                    0.199
## # ℹ 3 more variables: SE_complex_plant_prop <dbl>, Mean_microbial_prop <dbl>,
## #   SE_microbial_prop <dbl>
print("\nTimepoint Statistics:")
## [1] "\nTimepoint Statistics:"
print(timepoint_stats)
## # A tibble: 4 × 7
##   timepoint Mean_simple_plant_prop SE_simple_plant_prop Mean_complex_plant_prop
##   <chr>                      <dbl>                <dbl>                   <dbl>
## 1 wk0                        0.436              0.0204                    0.181
## 2 wk10                       0.398              0.0313                    0.190
## 3 wk30                       0.362             NA                         0.216
## 4 wk40                       0.412              0.00620                   0.203
## # ℹ 3 more variables: SE_complex_plant_prop <dbl>, Mean_microbial_prop <dbl>,
## #   SE_microbial_prop <dbl>